import os
import jieba
import glob
import tqdm


data_path = "THUCNews"
if not os.path.exists(data_path):
    raise FileNotFoundError("download dataurl:https://thunlp.oss-cn-qingdao.aliyuncs.com/THUCNews.zip")

golb_file_path = os.path.join(data_path, "*/*.txt")
txts = glob.glob(golb_file_path)


with open("jieba_cut.txt",mode='w',encoding='utf-8') as fwrite:
    for txt in tqdm.tqdm(txts):
        content = open(txt, encoding='utf-8').read()
        texts = content.split('\n')
        for text in texts:
            fwrite.write(" ".join(list(jieba.cut(text))) + "\n")





